library(rio)
library(survey)
library(srvyr)
library(tidyverse)

##############################
#Toplines
###############################
  
toplines <- import("./Data/toplines.xlsx") %>%
  select(-dk, -mixed_neither) %>%
  pivot_wider( 
    names_from=c(party), 
    values_from=c(tooextreme))


#################################
#Pew/CNN
##################################

combdata <- import("./Data/pew_cnn_comb.Rda")
combdata$sdate <- as.Date(combdata$sdate)       

#######Declare Survey Deisng
comb_design <- combdata %>%
  filter(!is.na(weight)) %>%  #This removes some observations with missing weight data
  srvyr::as_survey_design(ids = 1, weights=weight)

#######By PID Extremity
means <- comb_design %>%
  mutate(too_extreme1 = factor(too_extreme1, 
                               levels=c(1,2,3), 
                               labels=c("Neither", 
                                        "One Party", 
                                        "Both Parties"))) %>%
  filter(!is.na(too_extreme1)) %>%
  group_by(sdate, too_extreme1) %>%
  summarize(proportion = srvyr::survey_mean(na.rm=T)) %>% 
  mutate(perc = proportion * 100) %>%
  select(-proportion, -proportion_se) %>%
  pivot_wider(names_from = "too_extreme1", 
              values_from = "perc") %>%
  rename(date = sdate) 


merged <- left_join(toplines, means, by="date") %>%
  arrange(date)
#Microlevel data is not available for the 1999 CNN Survey. 

########################
#Adding in Lucid
########################
lucid <- import("./Data/lucid_coded_all.rda")
summarytools::freq(lucid$too_extreme_1)

merged[20,]
merged[20, 5] <- 26.05
merged[20, 6] <- 60.92
merged[20, 7] <- 13.03

merged1 <- merged %>%
  mutate(`Neither` = round(`Neither`, 0), 
         `One Party` = round(`One Party`, 0), 
         `Both Parties` = round(`Both Parties`, 0), 
         date = as.Date(date))

export(merged1, "./Data/survey_toplines.csv")

